df<-read.csv("input/commits.csv", header =TRUE, sep=",")
df
df$count <- log(df$count+1)
df
# standardizing variables for skills and aspirations.
cols <- c("count")
df[cols] <- scale(df[cols])
df[cols]
mod <- lm(count ~ factor(Group), data = df)
summary(mod)
Call:
lm(formula = count ~ factor(Group), data = df)
Residuals:
Min 1Q Median 3Q Max
-0.9882 -0.7652 -0.2453 0.5215 3.9111
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.08236 0.09075 -0.908 0.3645
factor(Group)1 0.23981 0.12475 1.922 0.0551 .
factor(Group)2 0.06943 0.12497 0.556 0.5787
factor(Group)3 0.01682 0.12235 0.137 0.8907
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9983 on 536 degrees of freedom
Multiple R-squared: 0.008999, Adjusted R-squared: 0.003452
F-statistic: 1.622 on 3 and 536 DF, p-value: 0.1831
# convert to nominal factor
df$Group = factor(df$Group)
df$phase = factor(df$phase)
library(plyr)
ddply(df, ~ Group * phase, function(data) summary(data$count) )
ddply(df, ~ Group * phase, summarise, count.mean=mean(count), count.sd = sd(count))
# histograms for two factors
hist(df[df$Group == 0 & df$phase == 1,]$count )

hist(df[df$Group == 0 & df$phase == 2,]$count )

hist(df[df$Group == 0 & df$phase == 3,]$count )

hist(df[df$Group == 0 & df$phase == 4,]$count )

hist(df[df$Group == 0 & df$phase == 5,]$count )

hist(df[df$Group == 1 & df$phase == 1,]$count )

hist(df[df$Group == 1 & df$phase == 2,]$count )

hist(df[df$Group == 1 & df$phase == 3,]$count )

hist(df[df$Group == 1 & df$phase == 4,]$count )

hist(df[df$Group == 1 & df$phase == 5,]$count )

hist(df[df$Group == 2 & df$phase == 1,]$count )

hist(df[df$Group == 2 & df$phase == 2,]$count )

hist(df[df$Group == 2 & df$phase == 3,]$count )

hist(df[df$Group == 2 & df$phase == 4,]$count )

hist(df[df$Group == 2 & df$phase == 5,]$count )

hist(df[df$Group == 3 & df$phase == 1,]$count )

hist(df[df$Group == 3 & df$phase == 2,]$count )

hist(df[df$Group == 3 & df$phase == 3,]$count )

hist(df[df$Group == 3 & df$phase == 4,]$count )

hist(df[df$Group == 3 & df$phase == 5,]$count )

boxplot(count ~ Group * phase, data = df, xlab="Group.phase", ylab="count")

with(df, interaction.plot(Group, phase, count, ylim=c(0, max(count)))) # interaction plot

# library for LMM we will use on relational novelty
library(lme4)
library(lmerTest)
library(car)
contrasts(df$Group) <= "contr.sum"
1 2 3
0 TRUE TRUE TRUE
1 TRUE TRUE TRUE
2 TRUE TRUE TRUE
3 TRUE TRUE TRUE
contrasts(df$phase) <= "contr.sum"
2 3 4 5
1 TRUE TRUE TRUE TRUE
2 TRUE TRUE TRUE TRUE
3 TRUE TRUE TRUE TRUE
4 TRUE TRUE TRUE TRUE
5 TRUE TRUE TRUE TRUE
full.model = lmer( count ~ Group/phase + (1 | Student ), data = df, REML = FALSE)
Anova(full.model, type=3, test.statistics="F")
Analysis of Deviance Table (Type III Wald chisquare tests)
Response: count
Chisq Df Pr(>Chisq)
(Intercept) 6.9240 1 0.008505 **
Group 3.3519 3 0.340483
Group:phase 47.2831 16 6.151e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpkZjwtcmVhZC5jc3YoImlucHV0L2NvbW1pdHMuY3N2IiwgaGVhZGVyID1UUlVFLCBzZXA9IiwiKQpkZgpgYGAKCmBgYHtyfQpkZiRjb3VudCA8LSBsb2coZGYkY291bnQrMSkgCmBgYAoKCmBgYHtyfQojIHN0YW5kYXJkaXppbmcgdmFyaWFibGVzIGZvciBza2lsbHMgYW5kIGFzcGlyYXRpb25zLiAKY29scyA8LSBjKCJsb2dfY291bnQiKQpkZltjb2xzXSA8LSBzY2FsZShkZltjb2xzXSkKYGBgCgoKYGBge3J9Cm1vZCA8LSBsbShjb3VudCB+IGZhY3RvcihHcm91cCksIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYApgYGB7cn0KIyBjb252ZXJ0IHRvIG5vbWluYWwgZmFjdG9yCmRmJEdyb3VwID0gZmFjdG9yKGRmJEdyb3VwKQpkZiRwaGFzZSA9IGZhY3RvcihkZiRwaGFzZSkKYGBgCgpgYGB7cn0KbGlicmFyeShwbHlyKQpkZHBseShkZiwgfiBHcm91cCAqIHBoYXNlLCBmdW5jdGlvbihkYXRhKSBzdW1tYXJ5KGRhdGEkY291bnQpICkKZGRwbHkoZGYsIH4gR3JvdXAgKiBwaGFzZSwgc3VtbWFyaXNlLCBjb3VudC5tZWFuPW1lYW4oY291bnQpLCBjb3VudC5zZCA9IHNkKGNvdW50KSkKYGBgCmBgYHtyfQojIGhpc3RvZ3JhbXMgZm9yIHR3byBmYWN0b3JzCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmJveHBsb3QoY291bnQgfiBHcm91cCAqIHBoYXNlLCBkYXRhID0gZGYsIHhsYWI9Ikdyb3VwLnBoYXNlIiwgeWxhYj0iY291bnQiKQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KEdyb3VwLCBwaGFzZSwgY291bnQsIHlsaW09YygwLCBtYXgoY291bnQpKSkpICMgaW50ZXJhY3Rpb24gcGxvdApgYGAKCmBgYHtyfQojIGxpYnJhcnkgZm9yIExNTSB3ZSB3aWxsIHVzZSBvbiByZWxhdGlvbmFsIG5vdmVsdHkgCgpsaWJyYXJ5KGxtZTQpCmxpYnJhcnkobG1lclRlc3QpCmxpYnJhcnkoY2FyKQpgYGAKCmBgYHtyfQpjb250cmFzdHMoZGYkR3JvdXApIDw9ICJjb250ci5zdW0iCmNvbnRyYXN0cyhkZiRwaGFzZSkgPD0gImNvbnRyLnN1bSIKYGBgCmBgYHtyfQpmdWxsLm1vZGVsID0gbG1lciggY291bnQgfiBHcm91cC9waGFzZSArICgxIHwgU3R1ZGVudCksIGRhdGEgPSBkZiwgUkVNTCA9IEZBTFNFKQpBbm92YShmdWxsLm1vZGVsLCB0eXBlPTMsIHRlc3Quc3RhdGlzdGljcz0iRiIpCmZ1bGwubW9kZWwKYGBgCmBgYHtyfQoKYGBgCgo=